'''


exec(''.join(open("/home/dalton_m/ppp/pyfiles/ANpppv12.py", encoding="utf8").readlines()[:]))
nohup python3 /home/dalton_m/ppp/pyfiles/ANpppv12.py  | tee &
'''

import sys
import pandas as pd
import numpy as np



filename = 'ANpppv12pt1'

dataloc=  "/dataERS/eract/daltonm/"
resultsloc="/workERS/daltonm/corona/ppp/results"


import os

sys.path.append('/workERS/daltonm/BG')
#try:
from CRmergeBGLDBv6func import *

sys.path.append('/workERS/daltonm/corona')
from basicfunctions import *

resultsloc1 = "/home/daltonm/ppp/"

from datetime import date
datestr = date.today().strftime(format="%Y%m%d")
resultsloc = resultsloc1 + datestr + '/'
if not os.path.exists(resultsloc):
    os.makedirs(resultsloc)

'''
make PPP loan amounts + counts 
by tract - 4 digit industry
includes cnty and cz, too
'''
kcols = ['ID', 'DateApproved', 'CurrentApprovalAmount', 'NAICSCode']
dfp = pd.read_csv(dataloc + 'pppfiles/all_ppp.psv', sep = '|')[kcols]

# get address info
# prelim info for reading in
dft1 = pd.read_excel(geog + 'geocodeoutput.xlsx', engine='openpyxl')
dft1.dropna(inplace=True)
widths = dft1['widths'].values.tolist()
cols = dft1['names'].values.tolist()
widths = [int(i) for i in widths]
cols = [i.strip() for i in cols]
# reading in actual data
dft2 = pd.read_fwf(dataloc + 'pppfiles/AutoGeo_ppp.txt',
                   error_bad_lines=False,
                   widths=widths, names=cols)
dft2['ID'] = dft2['All the data from the Input record, unchanged.'].apply(lambda x: str(x)[:20].strip())
dft2['fipscounty'] = dft2['County FIPS Code'].apply(lambda x: float(x) if x != np.NaN else np.NaN)
dft2['fipsstate'] = pd.to_numeric(dft2['FIPS State Code'], errors='coerce')
dft2['tract'] = pd.to_numeric(dft2['Census Tract'], errors='coerce')
dft2['ID'] = pd.to_numeric(dft2['ID'], errors='coerce')

kcols = ['ID', 'fipscounty', 'tract', 'fipsstate']
mc = ['ID']
dfp = merging(dfp,dft2[kcols],mc, mc,True,'inner')
dft2 = None

dfg = pd.read_excel(dataloc + 'geography/' + 'commuting_zones.xls')
dfg['fipscounty'] = dfg['FIPS'].apply(lambda x: float(str(x)[-3:]))
dfg['fipsstate'] = dfg['FIPS'].apply(lambda x: float(str(x)[:-3]))
rdict = {
    'Commuting Zone ID, 2000': 'cz',
}
mc = ['fipsstate', 'fipscounty']
kcols = mc + ['cz']
dfg = dfg.rename(columns=rdict)[kcols].drop_duplicates(subset=mc)
dfp = dfp.merge(dfg[kcols], on=mc, how='left', indicator=True)

dfp['naics4'] = dfp['NAICSCode'].astype('str').str[:4]
dfp['counter'] = 1
dfp['loan_year'] = pd.to_datetime(dfp['DateApproved']).dt.year

aggstats = {
    'counter' : 'sum',
    'CurrentApprovalAmount' : 'sum'
}
dfp1 = dfp.groupby(['fipsstate', 'cz', 'fipscounty', 'tract', 'naics4', 'loan_year'], as_index=False).agg(aggstats)

dfp1.to_csv(dataloc + 'pppfiles/'+filename + '.csv')